import torch
import torch_npu
torch.npu.set_compile_mode(jit_compile=False)
from transformers import AutoModelForCausalLM, AutoTokenizer,AutoModel

model_dir = 'chatglm3-6b'
print("model_dir:",model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
torch_device = "npu:0"
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).to(torch_device)
model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
